library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
"%&%" = function(a,b) paste(a,b,sep="")
my.dir = "/home/lauren/files_for_revisions_plosgen/fst_results/"
#my.dir = "~/mount/lauren/files_for_revisions_plosgen/fst_results/"
AFA
afa <- fread(my.dir %&% "fst_table_AFA.txt")
mean_afa <- afa[,list(mean_fstCAUafa=mean(fstCAUafa, na.rm = TRUE),mean_fstAFAhis=mean(fstAFAhis, na.rm = TRUE),
R2_AFA=mean(R2_AFA, na.rm = TRUE), R2_CAU=mean(R2_CAU, na.rm = TRUE),
R2_HIS=mean(R2_HIS, na.rm = TRUE),
betaAFA_fstCAUafa=sum(fstCAUafa * abs(betaAFA), na.rm = TRUE)/table(is.na(fstCAUafa * betaAFA))[[1]],
betaAFA_fstAFAhis=sum(fstAFAhis * abs(betaAFA), na.rm = TRUE)/table(is.na(fstAFAhis * betaAFA))[[1]]),
by=GENE]
mean_afa_0.2 <- dplyr::filter(mean_afa, R2_AFA > 0.2 | R2_CAU > 0.2)
ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_CAU,y=betaAFA_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_CAU,y=mean_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 6 rows containing non-finite values (stat_density2d).
## Warning: Removed 6 rows containing missing values (geom_point).

mean_afa_0.2 <- dplyr::filter(mean_afa, R2_AFA > 0.2 | R2_HIS > 0.2)
ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_HIS,y=betaAFA_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_afa_0.2, aes(x=R2_AFA-R2_HIS,y=mean_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).

CAU
cau <- fread(my.dir %&% "fst_table_CAU.txt")
mean_cau <- cau[,list(mean_fstCAUafa=mean(fstCAUafa, na.rm = TRUE),mean_fstHIScau=mean(fstHIScau, na.rm = TRUE),
R2_AFA=mean(R2_AFA, na.rm = TRUE), R2_CAU=mean(R2_CAU, na.rm = TRUE),
R2_HIS=mean(R2_HIS, na.rm = TRUE),
betaCAU_fstCAUafa=sum(fstCAUafa * abs(betaCAU), na.rm = TRUE)/table(is.na(fstCAUafa * betaCAU))[[1]],
betaCAU_fstHIScau=sum(fstHIScau * abs(betaCAU), na.rm = TRUE)/table(is.na(fstHIScau * betaCAU))[[1]]),
by=GENE]
mean_cau_0.2 <- dplyr::filter(mean_cau, R2_AFA > 0.2 | R2_CAU > 0.2)
ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_AFA,y=betaCAU_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_AFA,y=mean_fstCAUafa)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 6 rows containing non-finite values (stat_density2d).
## Warning: Removed 6 rows containing missing values (geom_point).

mean_cau_0.2 <- dplyr::filter(mean_cau, R2_CAU > 0.2 | R2_HIS > 0.2)
ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_HIS,y=betaCAU_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_cau_0.2, aes(x=R2_CAU-R2_HIS,y=mean_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).
## Warning: Removed 1 rows containing missing values (geom_point).

HIS
his <- fread(my.dir %&% "fst_table_HIS.txt")
mean_his <- his[,list(mean_fstAFAhis=mean(fstAFAhis, na.rm = TRUE),mean_fstHIScau=mean(fstHIScau, na.rm = TRUE),
R2_AFA=mean(R2_AFA, na.rm = TRUE), R2_CAU=mean(R2_CAU, na.rm = TRUE),
R2_HIS=mean(R2_HIS, na.rm = TRUE),
betaHIS_fstAFAhis=sum(fstAFAhis * abs(betaHIS), na.rm = TRUE)/table(is.na(fstAFAhis * betaHIS))[[1]],
betaHIS_fstHIScau=sum(fstHIScau * abs(betaHIS), na.rm = TRUE)/table(is.na(fstHIScau * betaHIS))[[1]]),
by=GENE]
mean_his_0.2 <- dplyr::filter(mean_his, R2_AFA > 0.2 | R2_HIS > 0.2)
ggplot(mean_his_0.2, aes(x=R2_HIS-R2_AFA,y=betaHIS_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_his_0.2, aes(x=R2_HIS-R2_AFA,y=mean_fstAFAhis)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).

mean_his_0.2 <- dplyr::filter(mean_his, R2_CAU > 0.2 | R2_HIS > 0.2)
ggplot(mean_his_0.2, aes(x=R2_HIS-R2_CAU,y=betaHIS_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

ggplot(mean_his_0.2, aes(x=R2_HIS-R2_CAU,y=mean_fstHIScau)) + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: Removed 1 rows containing non-finite values (stat_density2d).
## Warning: Removed 1 rows containing missing values (geom_point).

facet_wrap plots
- Perform a 2D kernel density estimation using kde2d and display the results with contours. This can be useful for dealing with overplotting. This is a 2d version of geom_density.
- kde2d: Two-dimensional kernel density estimation with an axis-aligned bivariate normal kernel, evaluated on a square grid.
afa_cau <- dplyr::select(mean_afa_0.2, GENE, pop1=R2_AFA, pop2=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaAFA_fstCAUafa) %>%
mutate(pop="AFA-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
afa_his <- dplyr::select(mean_afa_0.2, GENE, pop1=R2_AFA, pop2=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaAFA_fstAFAhis) %>%
mutate(pop="AFA-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
cau_afa <- dplyr::select(mean_cau_0.2, GENE, pop2=R2_AFA, pop1=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaCAU_fstCAUafa) %>%
mutate(pop="CAU-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
cau_his <- dplyr::select(mean_cau_0.2, GENE, pop1=R2_CAU, pop2=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaCAU_fstHIScau) %>%
mutate(pop="CAU-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
his_afa <- dplyr::select(mean_his_0.2, GENE, pop2=R2_AFA, pop1=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaHIS_fstAFAhis) %>%
mutate(pop="HIS-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
his_cau <- dplyr::select(mean_his_0.2, GENE, pop2=R2_CAU, pop1=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaHIS_fstHIScau) %>%
mutate(pop="HIS-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
all <- rbind(afa_cau,afa_his,cau_afa,cau_his,his_afa,his_cau)
all <- mutate(all,pop=factor(pop,levels=c("AFA-CAU","CAU-AFA","AFA-HIS","HIS-AFA","HIS-CAU","CAU-HIS")))
ggplot(all, aes(x=diffR2,y=beta_Fst)) + facet_wrap(~pop,nrow=3) +
geom_point(col="gray") + geom_density_2d() +
labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("weighted mean ", F[ST]))) +
theme_bw(14)
## Warning: Removed 65 rows containing non-finite values (stat_density2d).
## Warning: Removed 65 rows containing missing values (geom_point).

ggplot(all, aes(x=diffR2,y=beta_Fst)) + facet_wrap(~pop,nrow=3) +
geom_point(col="gray") + geom_density_2d() +
labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("weighted mean ", F[ST]))) +
theme_bw(14) + coord_cartesian(ylim=c(0,0.02))
## Warning: Removed 65 rows containing non-finite values (stat_density2d).
## Warning: Removed 65 rows containing missing values (geom_point).

ggplot(all, aes(x=diffR2,y=mean_Fst)) + facet_wrap(~pop,nrow=3) + geom_point(col='gray') + geom_density_2d() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("mean ", F[ST]))) + theme_bw(14)
## Warning: Removed 106 rows containing non-finite values (stat_density2d).
## Warning: Removed 106 rows containing missing values (geom_point).

suball <- dplyr::filter(all, pop=='AFA-CAU' | pop=='AFA-HIS' | pop=='CAU-HIS')
c <- ggplot(suball, aes(x=diffR2,y=mean_Fst)) + facet_wrap(~pop,nrow=1) + geom_point(col='gray') + geom_density_2d() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("mean ", F[ST])),title='C') + theme_bw(14) + coord_cartesian(xlim=c(-0.85,0.85)) + theme(plot.margin=unit(c(0,0.2,0,0.2), "cm"))
print(c)
## Warning: Removed 53 rows containing non-finite values (stat_density2d).
## Warning: Removed 53 rows containing missing values (geom_point).

no filter
afa_cau <- dplyr::select(mean_afa, GENE, pop1=R2_AFA, pop2=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaAFA_fstCAUafa) %>%
mutate(pop="AFA-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
afa_his <- dplyr::select(mean_afa, GENE, pop1=R2_AFA, pop2=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaAFA_fstAFAhis) %>%
mutate(pop="AFA-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
cau_afa <- dplyr::select(mean_cau, GENE, pop2=R2_AFA, pop1=R2_CAU, mean_Fst=mean_fstCAUafa, beta_Fst=betaCAU_fstCAUafa) %>%
mutate(pop="CAU-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
cau_his <- dplyr::select(mean_cau, GENE, pop1=R2_CAU, pop2=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaCAU_fstHIScau) %>%
mutate(pop="CAU-HIS",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
his_afa <- dplyr::select(mean_his, GENE, pop2=R2_AFA, pop1=R2_HIS, mean_Fst=mean_fstAFAhis, beta_Fst=betaHIS_fstAFAhis) %>%
mutate(pop="HIS-AFA",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
his_cau <- dplyr::select(mean_his, GENE, pop2=R2_CAU, pop1=R2_HIS, mean_Fst=mean_fstHIScau, beta_Fst=betaHIS_fstHIScau) %>%
mutate(pop="HIS-CAU",diffR2 = pop1 - pop2) %>% select(pop, GENE, pop1, pop2, diffR2, mean_Fst, beta_Fst)
all_nof <- rbind(afa_cau,afa_his,cau_afa,cau_his,his_afa,his_cau)
ggplot(all_nof, aes(x=diffR2,y=pop1,col=pop2)) + facet_wrap(~pop,nrow=3) + geom_point() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2))) + theme_bw(14) +
geom_density_2d()
## Warning: Removed 3572 rows containing non-finite values (stat_density2d).
## Warning: Removed 3572 rows containing missing values (geom_point).

suball_nof <- dplyr::filter(all_nof, pop=='AFA-CAU' | pop=='AFA-HIS' | pop=='CAU-HIS')
b <- ggplot(suball_nof, aes(x=diffR2,y=pop1,col=pop2)) + facet_wrap(~pop,nrow=1) + geom_point() + labs(x=expression(paste(R^2, " difference (pop1 - pop2)")),y=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)),title='B') + theme_bw(14) +
scale_color_gradientn(colours = c('blue','pink')) +
theme(legend.justification=c(0,1), legend.position=c(0.005,0.995),legend.title = element_text(size=10),legend.text = element_text(size=10),legend.key.size = unit(0.3, "cm"),plot.margin=unit(c(0,0.2,0,0.2), "cm")) +
coord_cartesian(xlim=c(-0.85,0.85)) + geom_density_2d(col='gray') + scale_size(range=c(5,20))
print(b)
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).

a <- ggplot(suball_nof, aes(x=pop1,y=pop2)) + geom_point() + geom_smooth(method="lm") + facet_wrap(~pop,nrow=1) + theme_bw(14)+
labs(x=expression(paste("pop1 ", R^2)),y=expression(paste("pop2 ", R^2)),title='A') + geom_density_2d(col="gray") +
theme(plot.margin=unit(c(0.05,0.2,0,0.2), "cm"))
print(a)
## Warning: Removed 1786 rows containing non-finite values (stat_smooth).
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).

grid.arrange(a, b, c, nrow=3)
## Warning: Removed 1786 rows containing non-finite values (stat_smooth).
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).
## Warning: Removed 1786 rows containing non-finite values (stat_density2d).
## Warning: Removed 1786 rows containing missing values (geom_point).
## Warning: Removed 53 rows containing non-finite values (stat_density2d).
## Warning: Removed 53 rows containing missing values (geom_point).

ggplot(all,aes(x=pop1,y=mean_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) +
scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) +
labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))
## Warning: Removed 106 rows containing non-finite values (stat_density2d).
## Warning: Removed 106 rows containing missing values (geom_point).

ggplot(all,aes(x=pop1,y=beta_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) +
scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) +
labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=pop1,y=beta_Fst,col=pop2)) + geom_point() + geom_density_2d(col='gray') + facet_wrap(~pop,nrow=3) +
scale_color_gradientn(colours = c('blue','pink')) + theme_bw(14) + coord_cartesian(ylim=c(0,0.02)) +
labs(x=expression(paste("pop1 ", R^2)),col=expression(paste("pop2 ", R^2)))

ggplot(all,aes(x=mean_Fst,col=pop)) + geom_density() + coord_cartesian(xlim=c(0,0.2))
## Warning: Removed 106 rows containing non-finite values (stat_density).

ggplot(all,aes(x=beta_Fst,col=pop)) + geom_density() + coord_cartesian(xlim=c(0,0.01))
